#!/usr/bin/env python
# -*- coding: utf-8 -*-

import sys,os
lib_path = os.path.abspath('../../')
sys.path.append(lib_path)

import re
from tool_interval import *

str1 = '''Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vivamus imperdiet arcu ut dui molestie feugiat. Aliquam erat volutpat. Vivamus quis turpis tortor, non tristique ante. Etiam ullamcorper eros ut ante porta venenatis nec ac tortor. Aenean quis elit elit. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse sagittis molestie consequat. Quisque posuere fringilla consequat. In vel lorem id erat pellentesque egestas ut eu risus. Donec quis dolor elit, id facilisis mauris. Nullam et velit in leo scelerisque ornare vel in eros.'''

str1_unicode = unicode(str1,'utf-8','replace')

pat1 = 'sum'
pat2 = '[^ ]+sum'
pat3 = '[dt]or'

m1 = re.finditer(pat1, str1_unicode)
u1 = Sequence([(m.start(), m.end()-1) for m in m1])
m2 = re.finditer(pat2, str1_unicode)
u2 = Sequence([(m.start(), m.end()-1) for m in m2])
m3 = re.finditer(pat3, str1_unicode)
u3 = Sequence([(m.start(), m.end()-1) for m in m3])

u = [u1.union(), u2.union(), u3.union()]
t = [('<u>','</u>'), ('<b>','</b>'), ('<i>','</i>')]
ts = Tool_sequencing()
smart_r = ts.segment_text_multitag(str1_unicode, u, t)

print '<div style="font-size: 10pt;text-align:justify; font-family:Bitstream vera sans; width:300px;">'
print '<p>%s%s%s, %s%s%s, %s%s%s</p>'%(t[0][0], pat1, t[0][1], t[1][0], pat2, t[1][1], t[2][0], pat3, t[2][1])
print smart_r
print '</div>'
